## Section 5 of the Online Appendix: Specification checks

library(haven)
library(dplyr)
library(ggplot2)
library(gridExtra)
library(grid)
library(reshape2)
library(grid)
library(rdrobust)
library(rdlocrand)
library(lubridate)


## Clear workspace

rm=list(ls())

##Opening the voting data:

load("ReplicationData.RData")

## Turnout variable
data2010$turnout <- as.numeric(data2010$VOTOU.1º.TURNO) - 1

#####################
###### 2010 #########
#####################

####################################################################################################
###  Inspection of baseline covariates at critical moments of interest: gender (ratio men/women) ###
####################################################################################################

## Create a dummy for female voters

data2010$female <- as.numeric(data2010$GÊNERO == 'FEMININO')
data2010$male <- as.numeric(data2010$GÊNERO == 'MASCULINO')


## Table S5.6

## Election Day: subsetting the 14-day window

data.18.rddED <- dplyr::filter(data2010, (dob >= "1992-09-27" & dob <= "1992-10-10") & turnout == 1)

DataRDDED18 <- data.18.rddED %>%
  filter(GRAU.INSTRUÇÃO != "Analfabeto" | GÊNERO != 'NÃO INFORMADO') %>%
  group_by(dob) %>%
  summarise(voters = n(),
            female = sum(female),
            male = sum(male))

DataRDDED18$daysToFrom <- seq(6, -7, by = -1)

##Identifying weekend days and holidays:

DataRDDED18$WeekendHolidays <- as.numeric(weekdays(DataRDDED18$dob) %in% c("Saturday", "Sunday"))

##Excluding weekend days:

DataRDDED18nowe <- DataRDDED18[ ! DataRDDED18$WeekendHolidays==1,]

DataRDDED18nowe$obliged <- as.numeric(DataRDDED18nowe$daysToFrom >= 0)

##Difference in proportions:

tPropWomenED18nowe <- prop.test(x=c(sum(DataRDDED18nowe$female[DataRDDED18nowe$obliged==0]),sum(DataRDDED18nowe$female[DataRDDED18nowe$obliged==1])), n=c(sum(DataRDDED18nowe$voters[DataRDDED18nowe$obliged==0]),sum(DataRDDED18nowe$voters[DataRDDED18nowe$obliged==1])))

print(tPropWomenED18nowe)



## End-of-year: subsetting the 14-day window

data.18.rddEoY <- dplyr::filter(data2010, (dob >= "1992-12-25" & dob <= "1993-01-07") & turnout == 1)

DataRDDEoY18 <- data.18.rddEoY %>%
  filter(GRAU.INSTRUÇÃO != "Analfabeto" | GÊNERO != 'NÃO INFORMADO') %>%
  group_by(dob) %>%
  summarise(voters = n(),
            female = sum(female),
            male = sum(male))

DataRDDEoY18$daysToFrom <- seq(6, -7, by = -1)

##Identifying weekend days and holidays:

DataRDDEoY18$WeekendHolidays <- as.numeric(weekdays(DataRDDEoY18$dob) %in% c("Saturday", "Sunday") | DataRDDEoY18$dob=="1992-12-24" | DataRDDEoY18$dob=="1992-12-25" | DataRDDEoY18$dob=="1992-12-31" | DataRDDEoY18$dob=="1993-01-01")

##Excluding weekend days:

DataRDDEoY18nowe <- DataRDDEoY18[ ! DataRDDEoY18$WeekendHolidays==1,]

DataRDDEoY18nowe$obliged <- as.numeric(DataRDDEoY18nowe$daysToFrom >= 0)

##Difference in proportions:

tPropWomenEoY18nowe <- prop.test(x=c(sum(DataRDDEoY18nowe$female[DataRDDEoY18nowe$obliged==0]),sum(DataRDDEoY18nowe$female[DataRDDEoY18nowe$obliged==1])), n=c(sum(DataRDDEoY18nowe$voters[DataRDDEoY18nowe$obliged==0]),sum(DataRDDEoY18nowe$voters[DataRDDEoY18nowe$obliged==1])))

print(tPropWomenEoY18nowe)



## RDD by gender: Table S5.7

## Election Day

## Women

rddED18noweWomen <- rdrandinf(DataRDDED18nowe$female, DataRDDED18nowe$daysToFrom, wl = -7, wr = 6, seed = 50, ci = .05)

## % increase
rddED18noweWomen$obs.stat/rddED18noweWomen$sumstats[3,1]*100

## % increase: lower bound
rddED18noweWomen$ci[1]/rddED18noweWomen$sumstats[3,1]*100

## % increase: upper bound
rddED18noweWomen$ci[2]/rddED18noweWomen$sumstats[3,1]*100

## Men

rddED18noweMen <- rdrandinf(DataRDDED18nowe$male, DataRDDED18nowe$daysToFrom, wl = -7, wr = 6, seed = 50, ci = .05)

## % increase
rddED18noweMen$obs.stat/rddED18noweMen$sumstats[3,1]*100

## % increase: lower bound
rddED18noweMen$ci[1]/rddED18noweMen$sumstats[3,1]*100

## % increase: upper bound
rddED18noweMen$ci[2]/rddED18noweMen$sumstats[3,1]*100


## End-of-year

## Women
rddEoY18noweWomen <- rdrandinf(DataRDDEoY18nowe$female, DataRDDEoY18nowe$daysToFrom, wl = -7, wr = 6, seed = 50, ci = .05)

## % increase
rddEoY18noweWomen$obs.stat/rddEoY18noweWomen$sumstats[3,1]*100

## % increase: lower bound
rddEoY18noweWomen$ci[1]/rddEoY18noweWomen$sumstats[3,1]*100

## % increase: upper bound
rddEoY18noweWomen$ci[2]/rddEoY18noweWomen$sumstats[3,1]*100


## Men

rddEoY18noweMen <- rdrandinf(DataRDDEoY18nowe$male, DataRDDEoY18nowe$daysToFrom, wl = -7, wr = 6, seed = 50, ci = .05)

## % increase
rddEoY18noweMen$obs.stat/rddEoY18noweMen$sumstats[3,1]*100

## % increase: lower bound
rddEoY18noweMen$ci[1]/rddEoY18noweMen$sumstats[3,1]*100

## % increase: upper bound
rddEoY18noweMen$ci[2]/rddEoY18noweMen$sumstats[3,1]*100



## Gender differences: Online Appendix section S5.1

print_labels(SurveyData$sex)

## Knowledge by gender among young voters:

## Women

DataWomen <- subset(SurveyData, (sex == 2 & age<25))

##Frequencies of knowledge questions: 1 correct answer, 0 otherwise

DataWomen$know1 <- as.numeric(DataWomen$P5A.1 == 1)
know1Women <- table(DataWomen$know1)
prop.table(know1Women)*100

DataWomen$know3 <- as.numeric(DataWomen$P5B.1 == 2)
know3Women <- table(DataWomen$know3)
prop.table(know3Women)*100


## Men

DataMen <- subset(SurveyData, (sex == 1 & age<25))

##Frequencies of knowledge questions: 1 correct answer, 0 otherwise

DataMen$know1 <- as.numeric(DataMen$P5A.1 == 1)
know1Men <- table(DataMen$know1)
prop.table(know1Men)*100

DataMen$know3 <- as.numeric(DataMen$P5B.1 == 2)
know3Men <- table(DataMen$know3)
prop.table(know3Men)*100


## Women vs Men: binomial proportion test

## Knowledge item 1:

Edup11 <- know1Women[2]/sum(know1Women)
Edun11 <- sum(know1Women)

Edup21 <- know1Men[2]/sum(know1Men)
Edun21 <- sum(know1Men)

Edup1 <- (Edun11 * Edup11 + Edun21 * Edup21)/ (Edun11 + Edun21)
Eduz1 <- (Edup11 - Edup21) / sqrt(Edup1 * (1-Edup1) * (1/Edun11 + 1/Edun21))
Eduz1

(1 - pnorm(Eduz1, 0, 1))*2

## Knowledge item 3:

Edup13 <- know3Women[2]/sum(know3Women)
Edun13 <- sum(know3Women)

Edup23 <- know3Men[2]/sum(know3Men)
Edun23 <- sum(know3Men)

Edup3 <- (Edun13 * Edup13 + Edun23 * Edup23)/ (Edun13 + Edun23)
Eduz3 <- (Edup13 - Edup23) / sqrt(Edup3 * (1-Edup3) * (1/Edun13 + 1/Edun23))
Eduz3

(1 - pnorm(Eduz3, 0, 1))*2



######################
### Placebo tests ####
######################

## Table S5.8

## 17-year olds

## Election Day: subsetting the 14-day window

data.17.rddED <- dplyr::filter(data2010, (dob >= "1993-09-27" & dob <= "1993-10-10") & turnout == 1)

DataRDDED17 <- data.17.rddED %>%
  filter(GRAU.INSTRUÇÃO != "Analfabeto") %>%
  group_by(dob) %>%
  summarise(voters = n())

DataRDDED17$daysToFrom <- seq(6, -7, by = -1)

##Identifying weekend days and holidays:

DataRDDED17$WeekendHolidays <- as.numeric(weekdays(DataRDDED17$dob) %in% c("Saturday", "Sunday"))

##Excluding weekend days:

DataRDDED17nowe <- DataRDDED17[ ! DataRDDED17$WeekendHolidays==1,]

## RDD

rddED17nowe <- rdrandinf(DataRDDED17nowe$voters, DataRDDED17nowe$daysToFrom, wl = -7, wr = 6, seed = 50, ci = .05)

## % increase
rddED17nowe$obs.stat/rddED17nowe$sumstats[3,1]*100

## % increase: lower bound
rddED17nowe$ci[1]/rddED17nowe$sumstats[3,1]*100

## % increase: upper bound
rddED17nowe$ci[2]/rddED17nowe$sumstats[3,1]*100


## End-of-year: subsetting the 14-day window

data.17.rddEoY <- dplyr::filter(data2010, (dob >= "1993-12-25" & dob <= "1994-01-07") & turnout == 1)

DataRDDEoY17 <- data.17.rddEoY %>%
  filter(GRAU.INSTRUÇÃO != "Analfabeto") %>%
  group_by(dob) %>%
  summarise(voters = n(),
            female = sum(female),
            male = sum(male))

DataRDDEoY17$daysToFrom <- seq(6, -7, by = -1)

##Identifying weekend days and holidays:

DataRDDEoY17$WeekendHolidays <- as.numeric(weekdays(DataRDDEoY17$dob) %in% c("Saturday", "Sunday") | DataRDDEoY17$dob=="1993-12-24" | DataRDDEoY17$dob=="1993-12-25" | DataRDDEoY17$dob=="1993-12-31" | DataRDDEoY17$dob=="1994-01-01")

##Excluding weekend days:

DataRDDEoY17nowe <- DataRDDEoY17[ ! DataRDDEoY17$WeekendHolidays==1,]

## RDD

rddEoY17nowe <- rdrandinf(DataRDDEoY17nowe$voters, DataRDDEoY17nowe$daysToFrom, wl = -7, wr = 6, seed = 50, ci = .05)

## % increase
rddEoY17nowe$obs.stat/rddEoY17nowe$sumstats[3,1]*100

## % increase: lower bound
rddEoY17nowe$ci[1]/rddEoY17nowe$sumstats[3,1]*100

## % increase: upper bound
rddEoY17nowe$ci[2]/rddEoY17nowe$sumstats[3,1]*100


## 19-year olds

## Election Day: subsetting the 14-day window

data.19.rddED <- dplyr::filter(data2010, (dob >= "1991-09-27" & dob <= "1991-10-10") & turnout == 1)

DataRDDED19 <- data.19.rddED %>%
  filter(GRAU.INSTRUÇÃO != "Analfabeto") %>%
  group_by(dob) %>%
  summarise(voters = n())

DataRDDED19$daysToFrom <- seq(6, -7, by = -1)

##Identifying weekend days and holidays:

DataRDDED19$WeekendHolidays <- as.numeric(weekdays(DataRDDED19$dob) %in% c("Saturday", "Sunday"))

##Excluding weekend days:

DataRDDED19nowe <- DataRDDED19[ ! DataRDDED19$WeekendHolidays==1,]

## RDD

rddED19nowe <- rdrandinf(DataRDDED19nowe$voters, DataRDDED19nowe$daysToFrom, wl = -7, wr = 6, seed = 50, ci = .05)

## % increase
rddED19nowe$obs.stat/rddED19nowe$sumstats[3,1]*100

## % increase: lower bound
rddED19nowe$ci[1]/rddED19nowe$sumstats[3,1]*100

## % increase: upper bound
rddED19nowe$ci[2]/rddED19nowe$sumstats[3,1]*100


## End-of-year: subsetting the 14-day window

data.19.rddEoY <- dplyr::filter(data2010, (dob >= "1991-12-25" & dob <= "1992-01-07") & turnout == 1)

DataRDDEoY19 <- data.19.rddEoY %>%
  filter(GRAU.INSTRUÇÃO != "Analfabeto") %>%
  group_by(dob) %>%
  summarise(voters = n(),
            female = sum(female),
            male = sum(male))

DataRDDEoY19$daysToFrom <- seq(6, -7, by = -1)

##Identifying weekend days and holidays:

DataRDDEoY19$WeekendHolidays <- as.numeric(weekdays(DataRDDEoY19$dob) %in% c("Saturday", "Sunday") | DataRDDEoY19$dob=="1991-12-24" | DataRDDEoY19$dob=="1991-12-25" | DataRDDEoY19$dob=="1991-12-31" | DataRDDEoY19$dob=="1992-01-01")

##Excluding weekend days:

DataRDDEoY19nowe <- DataRDDEoY19[ ! DataRDDEoY19$WeekendHolidays==1,]

## RDD

rddEoY19nowe <- rdrandinf(DataRDDEoY19nowe$voters, DataRDDEoY19nowe$daysToFrom, wl = -7, wr = 6, seed = 50, ci = .05)

## % increase
rddEoY19nowe$obs.stat/rddEoY19nowe$sumstats[3,1]*100

## % increase: lower bound
rddEoY19nowe$ci[1]/rddEoY19nowe$sumstats[3,1]*100

## % increase: upper bound
rddEoY19nowe$ci[2]/rddEoY19nowe$sumstats[3,1]*100




################################
### S5.3 Bandwidth analysis ####
################################

## Election Day: subsetting a 42-day window

data.18.rddEDBand <- dplyr::filter(data2010, (dob >= "1992-09-13" & dob <= "1992-10-24") & turnout == 1)

DataRDDED18Band <- data.18.rddEDBand %>%
  filter(GRAU.INSTRUÇÃO != "Analfabeto") %>%
  group_by(dob) %>%
  summarise(voters = n())

DataRDDED18Band$daysToFrom <- seq(20, -21, by = -1)

##Identifying weekend days and holidays:

DataRDDED18Band$WeekendHolidays <- as.numeric(weekdays(DataRDDED18Band$dob) %in% c("Saturday", "Sunday") | DataRDDED18Band$dob=="1992-10-12")

##Excluding weekend days:

DataRDDED18Bandnowe <- DataRDDED18Band[ ! DataRDDED18Band$WeekendHolidays==1,]

DataRDDED18Bandnowe$obliged <- as.numeric(DataRDDED18Bandnowe$daysToFrom >= 0)

##Using the rdrandinf package: starting at three obs on each side

rddEstimatesED <- list(); estimatesEDyoung <- list(); pvalueEDyoung <- list(); windowEDyoung <- list()

for (i in 4:21){

    rddEstimatesED[[i]] <- rdrandinf(DataRDDED18Bandnowe$voters, DataRDDED18Bandnowe$daysToFrom, cutoff = 0, wl = -i, wr = i-1, seed = 12345, ci = .05)

    estimatesEDyoung[[i]] <- rddEstimatesED[[i]]$obs.stat[1]
    pvalueEDyoung[[i]] <- rddEstimatesED[[i]]$p.value[1]
    windowEDyoung[[i]] <- rddEstimatesED[[i]]$window

}

EDyoung.estimates <- data.frame(matrix(unlist(estimatesEDyoung), nrow=length(Filter(Negate(is.null), estimatesEDyoung)), byrow=T),stringsAsFactors=FALSE)
EDyoung.pvalue <- data.frame(matrix(unlist(pvalueEDyoung), nrow=length(Filter(Negate(is.null), pvalueEDyoung)), byrow=T),stringsAsFactors=FALSE)
EDyoung.window <- data.frame(matrix(unlist(windowEDyoung), nrow=length(Filter(Negate(is.null), windowEDyoung)), byrow=T),stringsAsFactors=FALSE)


tmp <- data.frame(
    bandwidth = seq(3, 20, by = 1),  
    EDyoung.estimates,
    EDyoung.pvalue,
    sig = factor(as.numeric(EDyoung.pvalue < .05), labels = c( "p-value \u2265 .05", "p-value < .05"))
)

colnames(tmp)[2] <- "effect"
colnames(tmp)[3] <- "pvalue"

tmp$id <- factor(ifelse(tmp$bandwidth == 6, 1, 2), labels = c("nsns", "dve"))

## Plot effects: Figure S5.3

EDBand2010 <- ggplot(tmp, aes(x=bandwidth, y=effect, colour = sig, shape = id)) + geom_point(size = 2) + ylim(500,1750) +
    theme_bw() + ggtitle("Election Day") + labs(colour="Statistical significance") + xlab("Bandwidth (ignoring weekends and holidays)") + ylab("Estimated treatment effect") +
    theme(legend.position="bottom") + theme(legend.title = element_blank()) + scale_shape_manual(values = c(19, 17), guide = FALSE)

ggsave("~/Dropbox/Documents/Projects/Active_Projects/Compulsory_Voting_BR/Replication_files/PSRM/analysis_bandwidth_ED_2010.png", plot = EDBand2010)


## End-of-year: subsetting a 42-day window

data.18.rddEoYBand <- dplyr::filter(data2010, (dob >= "1992-12-11" & dob <= "1993-01-21") & turnout == 1)

DataRDDEoY18Band <- data.18.rddEoYBand %>%
  filter(GRAU.INSTRUÇÃO != "Analfabeto") %>%
  group_by(dob) %>%
  summarise(voters = n())

DataRDDEoY18Band$daysToFrom <- seq(20, -21, by = -1)

##Identifying weekend days and holidays:

DataRDDEoY18Band$WeekendHolidays <- as.numeric(weekdays(DataRDDEoY18Band$dob) %in% c("Saturday", "Sunday") | DataRDDEoY18Band$dob=="1992-12-24" | DataRDDEoY18Band$dob=="1992-12-25" | DataRDDEoY18Band$dob=="1992-12-31" | DataRDDEoY18Band$dob=="1993-01-01")

##Excluding weekend days:

DataRDDEoY18Bandnowe <- DataRDDEoY18Band[ ! DataRDDEoY18Band$WeekendHolidays==1,]

DataRDDEoY18Bandnowe$obliged <- as.numeric(DataRDDEoY18Bandnowe$daysToFrom >= 0)

##Using the rdrandinf package: starting at three obs on each side

rddEstimatesEoY <- list(); estimatesEoYyoung <- list(); pvalueEoYyoung <- list(); windowEoYyoung <- list()

for (i in 6:21){

    rddEstimatesEoY[[i]] <- rdrandinf(DataRDDEoY18Bandnowe$voters, DataRDDEoY18Bandnowe$daysToFrom, cutoff = 0, wl = -i, wr = i-1, seed = 12345, ci = .05)

    estimatesEoYyoung[[i]] <- rddEstimatesEoY[[i]]$obs.stat[1]
    pvalueEoYyoung[[i]] <- rddEstimatesEoY[[i]]$p.value[1]
    windowEoYyoung[[i]] <- rddEstimatesEoY[[i]]$window

}

EoYyoung.estimates <- data.frame(matrix(unlist(estimatesEoYyoung), nrow=length(Filter(Negate(is.null), estimatesEoYyoung)), byrow=T),stringsAsFactors=FALSE)
EoYyoung.pvalue <- data.frame(matrix(unlist(pvalueEoYyoung), nrow=length(Filter(Negate(is.null), pvalueEoYyoung)), byrow=T),stringsAsFactors=FALSE)
EoYyoung.window <- data.frame(matrix(unlist(windowEoYyoung), nrow=length(Filter(Negate(is.null), windowEoYyoung)), byrow=T),stringsAsFactors=FALSE)


tmp <- data.frame(
    bandwidth = seq(3, 18, by = 1),  
    EoYyoung.estimates,
    EoYyoung.pvalue,
    sig = factor(as.numeric(EoYyoung.pvalue < .05), labels = c( "p-value \u2265 .05", "p-value < .05"))
)

colnames(tmp)[2] <- "effect"
colnames(tmp)[3] <- "pvalue"

tmp$id <- factor(ifelse(tmp$bandwidth == 4, 1, 2), labels = c("nsns", "dve"))

## Plot effects: Figure S5.4

EoYBand2010 <- ggplot(tmp, aes(x=bandwidth, y=effect, colour = sig, shape = id)) + geom_point(size = 2) + ylim(350,475) +
    theme_bw() + ggtitle("End of Year") + labs(colour="Statistical significance") + xlab("Bandwidth (ignoring weekends and holidays)") + ylab("Estimated treatment effect") +
    theme(legend.position="bottom") + theme(legend.title = element_blank()) + scale_shape_manual(values = c(19, 17), guide = FALSE)

ggsave("analysis_bandwidth_EoY_2010.png", plot = EoYBand2010)



###############################
## Evaluating other cutoffs ###
###############################

## Figures S5.5 and S5.6

## End-of-year: subsetting the 14-day window

data.18.rddOthers <- dplyr::filter(data2010, (dob >= "1992-10-11" & dob <= "1993-02-08") & turnout == 1)

DataRDDOthers18 <- data.18.rddOthers %>%
  filter(GRAU.INSTRUÇÃO != "Analfabeto") %>%
  group_by(dob) %>%
  summarise(voters = n(),
            female = sum(female),
            male = sum(male))

## Reverse order of data

DataRDDOthers18rev <- DataRDDOthers18[seq(dim(DataRDDOthers18)[1],1),]

running <- seq(1, 121, by = 1)

DataRDD18full <- cbind(DataRDDOthers18rev, running) 


##Identifying weekend days and holidays:

DataRDD18full$WeekendHolidays <- as.numeric(weekdays(DataRDD18full$dob) %in% c("Saturday", "Sunday") | DataRDD18full$dob=="1992-10-12" | DataRDD18full$dob=="1992-11-02" | DataRDD18full$dob=="1992-11-15" | DataRDD18full$dob=="1992-12-24" | DataRDD18full$dob=="1992-12-25" | DataRDD18full$dob=="1992-12-31" | DataRDD18full$dob=="1993-01-01")

##Excluding weekend days:

DataRDD18fullnowe <- DataRDD18full[ ! DataRDD18full$WeekendHolidays==1,]


## First period from 1992-10-11 to 1992-12-23

DataRDD18fullnowe1 <- subset(DataRDD18fullnowe, running >= 48)

## RDD from 1992-10-18 to 1992-12-16

rddEstimates1 <- list(); estimatesRDD1 <- list(); pvalueRDD1 <- list(); denominatorRDD1 <- list()

for (i in 55:113){

    rddEstimates1[[i]] <- rdrandinf(DataRDD18fullnowe1$voters, DataRDD18fullnowe1$running, cutoff = i, wl = i-7, wr = i+6, seed = 12345, ci = .05)

    estimatesRDD1[[i]] <- rddEstimates1[[i]]$obs.stat[1]
    pvalueRDD1[[i]] <- rddEstimates1[[i]]$p.value[1]
    denominatorRDD1[[i]] <- rddEstimates1[[i]]$sumstats[3,1]

}

RDD1.estimates <- data.frame(matrix(unlist(estimatesRDD1), nrow=length(Filter(Negate(is.null), estimatesRDD1)), byrow=T),stringsAsFactors=FALSE)

RDD1.pvalue <- data.frame(matrix(unlist(pvalueRDD1), nrow=length(Filter(Negate(is.null), pvalueRDD1)), byrow=T),stringsAsFactors=FALSE)

RDD1.denominator <- data.frame(matrix(unlist(denominatorRDD1), nrow=length(Filter(Negate(is.null), denominatorRDD1)), byrow=T),stringsAsFactors=FALSE)

RDD1data <- subset(DataRDD18full, (running >=55 & running <=113))
RDD1 <- cbind(RDD1data, RDD1.estimates, RDD1.pvalue, RDD1.denominator)
RDD1nowe <- RDD1[ ! RDD1$WeekendHolidays==1,]

colnames(RDD1nowe)[7] <- "estimate"
colnames(RDD1nowe)[8] <- "pvalue"
colnames(RDD1nowe)[9] <- "denominator"


## Second period from 1993-01-07 to 1993-02-08

DataRDD18fullnowe2 <- subset(DataRDD18fullnowe, running < 33)

## RDD from 1993-01-14 to 1993-02-01

rddEstimates2 <- list(); estimatesRDD2 <- list(); pvalueRDD2 <- list(); denominatorRDD2 <- list()

for (i in 8:26){

    rddEstimates2[[i]] <- rdrandinf(DataRDD18fullnowe2$voters, DataRDD18fullnowe2$running, cutoff = i, wl = i-7, wr = i+6, seed = 12345, ci = .05)

    estimatesRDD2[[i]] <- rddEstimates2[[i]]$obs.stat[1]
    pvalueRDD2[[i]] <- rddEstimates2[[i]]$p.value[1]
    denominatorRDD2[[i]] <- rddEstimates2[[i]]$sumstats[3,1]
}

RDD2.estimates <- data.frame(matrix(unlist(estimatesRDD2), nrow=length(Filter(Negate(is.null), estimatesRDD2)), byrow=T),stringsAsFactors=FALSE)

RDD2.pvalue <- data.frame(matrix(unlist(pvalueRDD2), nrow=length(Filter(Negate(is.null), pvalueRDD2)), byrow=T),stringsAsFactors=FALSE)

RDD2.denominator <- data.frame(matrix(unlist(denominatorRDD2), nrow=length(Filter(Negate(is.null), denominatorRDD2)), byrow=T),stringsAsFactors=FALSE)

RDD2data <- subset(DataRDD18full, (running >=8 & running <=26))
RDD2 <- cbind(RDD2data, RDD2.estimates, RDD2.pvalue, RDD2.denominator)
RDD2nowe <- RDD2[ ! RDD2$WeekendHolidays==1,]

colnames(RDD2nowe)[7] <- "estimate"
colnames(RDD2nowe)[8] <- "pvalue"
colnames(RDD2nowe)[9] <- "denominator"


RDDFINAL <- rbind(RDD1nowe, RDD2nowe)

RDDFINAL$increase <- RDDFINAL$estimate/RDDFINAL$denominator*100

str(RDDFINAL)

RDDFINAL$padj <- p.adjust(RDDFINAL$pvalue, method="holm")

tmp <- data.frame(
    dob = rep(RDDFINAL$dob, 2),  
    p = c(RDDFINAL$pvalue, RDDFINAL$padj),  
    type = factor(rep(c(1,2), each=nrow(RDDFINAL)), labels=c("Raw", "Adjusted (Holm)"))
)


## Plot p-values

graph2010pvalue <- ggplot(tmp, aes(x=dob, y=p, colour=type)) + geom_point(pch=16) + geom_hline(yintercept=.05, col="gray50", linetype="dotted", size=.5) +  
    theme_bw() + labs(colour="Adjustment Type") + xlab("Date of birth (in 1992/93)") + ylab("p-value")

ggsave("other_cutoffs_pvalues_2010.png", plot = graph2010pvalue)


## Average increase in percentage among the significant differences: Figure S5.6

mean(RDDFINAL$increase[RDDFINAL$pvalue < .05])
RDDFINAL$increase[RDDFINAL$pvalue < .05]

tmp2 <- data.frame(
    dob = RDDFINAL$dob,  
    i = RDDFINAL$increase,
    p = RDDFINAL$pvalue
)

graph2010increase <- ggplot(subset(tmp2, p < .05), aes(x=dob, y=i)) +
    geom_point(pch=17) +
    geom_hline(yintercept=20.1, col="gray50", linetype="longdash", size=.5) +
    geom_hline(yintercept=12.3, col="gray50", linetype="longdash", size=.5) +
    annotate(geom="text", x= as.Date("1993-01-15"), y=19.6, label="Election Day") +
    annotate(geom="text", x= as.Date("1993-01-15"), y=11.8, label="End-of-year") +
    theme_bw() + xlab("Date of birth (in 1992/93)") + ylab("Estimate increase in %")

ggsave("other_cutoffs_estimates_2010.png", plot = graph2010increase)




















